packages <- c("CIMseq", "CIMseq.data", "tidyverse", "circlize", "printr")
purrr::walk(packages, library, character.only = TRUE)
rm(packages)

##DATA
load('../data/CIMseqData.rda')
load('../data/sObj.rda')

#there are 5 cells that were classified as colon but sorted as SI. These have to
#be removed manually
c <- getData(cObjSng, "classification")
s <- names(c[c %in% c("4", "9")])
i <- which(colnames(getData(cObjSng, "counts")) %in% s)
cObjSng <- CIMseqSinglets(
  getData(cObjSng, "counts")[, -i],
  getData(cObjSng, "counts.ercc")[, -i],
  getData(cObjSng, "dim.red")[-i, ],
  getData(cObjSng, "classification")[-i]
)

Fig 1: classes

p <- plotUnsupervisedClass(cObjSng, cObjMul)
p

ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.classes.pdf',
  device = cairo_pdf,
  height = 180,
  width = 180,
  units = "mm"
)

Fig 2: Cell type gene expression

p <- plotUnsupervisedMarkers(
  cObjSng, cObjMul,
  c("Lgr5", "Ptprc", "Chga", "Dclk1", "Slc26a3", "Atoh1"),
  pal = RColorBrewer::brewer.pal(8, "Set1")
)
ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.markers.pdf',
  device = cairo_pdf,
  height = 180,
  width = 180,
  units = "mm"
)

Fig 3: Cell cycle

p <- plotUnsupervisedMarkers(
  cObjSng, cObjMul, c("Mki67"),
  pal = RColorBrewer::brewer.pal(8, "Set1")
)
ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.Mki67.pdf',
  device = cairo_pdf,
  height = 180,
  width = 180,
  units = "mm"
)

Fig 4: Connections per multiplet

adj <- adjustFractions(cObjSng, cObjMul, sObj)
as.data.frame(table(apply(adj, 1, sum)))
Var1 Freq
0 142
1 571
2 588
3 268
4 94
5 32
6 5
7 3

Fig 5: Fraction histogram

tibble(fractions = c(fractions)) %>%
  ggplot() +
  geom_histogram(aes(fractions), binwidth = 0.01) +
  theme_bw()

Fig 6: Detected cell types vs. cost

tibble(
  nCellTypes = apply(adj, 1, sum),
  cost = getData(sObj, "costs")
) %>%
  ggplot() +
  geom_boxplot(aes(nCellTypes, cost, group = nCellTypes)) +
  scale_x_continuous(name = "Detected cell types", breaks = 0:max(apply(adj, 1, sum))) +
  theme_bw()

Fig 7: Estimated cell numbers vs. cost

tibble(
  sample = names(getData(sObj, "costs")),
  cost = unname(getData(sObj, "costs"))
) %>%
  inner_join(
    select(estimateCells(cObjSng, cObjMul), sample, estimatedCellNumber), 
    by = "sample"
  ) %>%
  mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
  ggplot() +
  geom_boxplot(aes(estimatedCellNumber, cost, group = estimatedCellNumber)) +
  scale_x_continuous(
    name = "ERCC estimated cell number", 
    breaks = 0:max(round(pull(estimateCells(cObjSng, cObjMul), estimatedCellNumber)))
  ) +
  theme_bw()

Fig 8: Estimated cell number vs. Detected cell number

ercc <- filter(estimateCells(cObjSng, cObjMul), sampleType == "Multiplet")
nConnections <- apply(adj, 1, sum)
nConnections <- nConnections[match(ercc$sample, names(nConnections))]
tibble(
  detectedConnections = round(nConnections),
  estimatedCellNumber = round(ercc$estimatedCellNumber)
) %>%
  ggplot() +
  geom_boxplot(aes(estimatedCellNumber, detectedConnections, group = estimatedCellNumber)) +
  scale_x_continuous(
    name = "ERCC estimated cell number", 
    breaks = 0:max(round(ercc$estimatedCellNumber))
  ) +
  scale_y_continuous(
    name = "Detected cell number",
    breaks = 0:max(round(nConnections))
  ) +
  theme_bw()

Fig 9: Detected cell number vs. Total counts

tibble(
  sample = names(nConnections),
  detectedConnections = nConnections
) %>%
  inner_join(tibble(
    sample = colnames(getData(cObjMul, "counts")),
    total.counts = colSums(getData(cObjMul, "counts"))
  ), by = "sample") %>%
  ggplot() +
  geom_boxplot(aes(detectedConnections, total.counts, group = detectedConnections)) +
  scale_x_continuous(
    name = "Detected cell number", 
    breaks = 0:max(nConnections)
  ) +
  scale_y_continuous(name = "Total counts") +
  theme_bw()

Fig 10: Detected cell number vs. Total ERCC counts

tibble(
  sample = names(nConnections),
  detectedConnections = nConnections
) %>%
  inner_join(tibble(
    sample = colnames(getData(cObjMul, "counts")),
    total.ercc = colSums(getData(cObjMul, "counts.ercc"))
  ), by = "sample") %>%
  ggplot() +
  geom_boxplot(aes(detectedConnections, total.ercc, group = detectedConnections)) +
  scale_x_continuous(
    name = "Detected cell number", 
    breaks = 0:max(nConnections)
  ) +
  scale_y_continuous(name = "Total ERCC counts") +
  theme_bw()

Fig 11: Relative frequency of singlets vs. deconvoluted multiplets

singlets <- c(table(getData(cObjSng, "classification")))
singlets <- singlets / sum(singlets)
deconv <- colSums(adjustFractions(cObjSng, cObjMul, sObj))
deconv <- deconv[match(names(singlets), names(deconv))]
deconv <- deconv / sum(deconv)
if(!identical(names(singlets), names(deconv))) stop("name mismatch")

p <- tibble(
  class = names(singlets),
  singlet.freq = singlets,
  multiplet.freq = deconv
) %>%
  ggplot() +
  geom_point(aes(singlet.freq, multiplet.freq, colour = class), size = 3) +
  scale_colour_manual(values = col40()) +
  xlim(min(c(deconv, singlets)), max(c(deconv, singlets))) +
  ylim(min(c(deconv, singlets)), max(c(deconv, singlets))) +
  geom_abline(slope = 1, intercept = 0, lty = 3, colour = "grey") +
  labs(x = "Singlet relative frequency", y = "Multiplet relative frequency") +
  guides(colour = guide_legend(title = "Cell Type")) +
  theme_bw()

p

ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.sngMulRelFreq.pdf',
  device = cairo_pdf,
  height = 180,
  width = 180,
  units = "mm"
)

Fig 12: All connections

plotSwarmCircos(sObj, cObjSng, cObjMul, classOrder = cOrder)
## Joining, by = "class"

Fig 13: Filtered

Only detected duplicates, triplicates, and quadruplicates.
ERCC estimated cell number set to max 4.
Weight cutoff = 10.

adj <- adjustFractions(cObjSng, cObjMul, sObj, binary = TRUE)
samples <- rownames(adj)
rs <- rowSums(adj)
keep <- rs == 2 | rs == 3 | rs == 4

plotSwarmCircos(
  filterSwarm(sObj, keep), cObjSng, cObjMul, weightCut = 10, 
  classOrder = cOrder, theoretical.max = 4
)
## Joining, by = "class"

pdf('../figures/MGA.enge20.circos.pdf', width = 9.5, height = 9.5)
plotSwarmCircos(
  filterSwarm(sObj, keep), cObjSng, cObjMul, weightCut = 10, 
  classOrder = cOrder, theoretical.max = 4
)
## Joining, by = "class"
dev.off()
## quartz_off_screen 
##                 2